We will use PyMC3 to estimate the posterior PDF for the true rating of a set of artificial teams using data from a simulated season. The idea is to test our model on a small set of artificial data where we know the answer to begin with, so we can learn about MCMC and make sure our model is sensible.
In [1]:
import pandas as pd
import os
import numpy as np
import pymc3 as pm
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
true_rating = {
'All Stars': 2.0,
'Average': 0.0,
'Just Having Fun': -1.2,
}
true_index = {
0: 'All Stars',
1: 'Average',
2: 'Just Having Fun',
}
n_teams = len(true_rating)
team_numbers = range(n_teams)
team_names = [true_index[i] for i in team_numbers]
In [3]:
true_rating
Out[3]:
In [4]:
team_names
Out[4]:
We have two teams, one of which is much better than the other. Let's make a simulated season between these teams.
In [5]:
season_length = [5, 20, 100]
traces = []
simulatedSeasons = []
for n_games in season_length:
games = range(n_games)
database = []
for game in games:
game_row = {}
matchup = np.random.choice(team_numbers, size=2, replace=False)
team0 = true_index[matchup[0]]
team1 = true_index[matchup[1]]
game_row['Team A'] = team0
game_row['Team B'] = team1
game_row['Index A'] = matchup[0]
game_row['Index B'] = matchup[1]
deltaRating = true_rating[team0] - true_rating[team1]
p = 1 / (1 + np.exp(-deltaRating))
randomNumber = np.random.random()
outcome_A = p > randomNumber
game_row['Team A Wins'] = outcome_A
database.append(game_row)
simulatedSeason = pd.DataFrame(database)
simulatedSeasons.append(simulatedSeason)
with pm.Model() as model:
rating = pm.Normal('rating', mu=0, sd=1, shape=n_teams)
deltaRating = rating[simulatedSeason['Index A'].values] - rating[simulatedSeason['Index B'].values]
p = 1 / (1 + np.exp(-deltaRating))
win = pm.Bernoulli('win', p, observed=simulatedSeason['Team A Wins'].values)
trace = pm.sample(1000)
traces.append(trace)
In [136]:
simulatedSeasons[1].groupby('Team A').sum()
Out[136]:
In [135]:
1 / (1 + np.exp(-2))
Out[135]:
In [6]:
sns.set_context('poster')
f, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 15))
# plt.figure(figsize=(10, 5))
for ax_index, n_games in enumerate(season_length):
ax = axes[ax_index]
for team_number in team_numbers:
rating_posterior = traces[ax_index]['rating'][:, team_number]
team_name = true_index[team_number]
sns.distplot(rating_posterior, label=team_name, ax=ax)
ax.legend()
ax.set_xlabel('Rating')
ax.set_ylabel('Density')
ax.set_title("Season length: {} games".format(n_games))
plt.tight_layout()
In [97]:
simulatedSeason = pd.DataFrame(database)
In [98]:
simulatedSeason
Out[98]:
In [75]:
project_dir = '/Users/rbussman/Projects/BUDA/buda-ratings'
scores_dir = os.path.join(project_dir, 'data', 'raw', 'game_scores')
simulatedSeason.to_csv(os.path.join(scores_dir, 'artificial_scores_big.csv'))
Prior on each team is a normal distribution with mean of 0 and standard deviation of 1.
In [99]:
simulatedSeason.shape
Out[99]:
In [100]:
with pm.Model() as model:
rating = pm.Normal('rating', mu=0, sd=1, shape=n_teams)
deltaRating = rating[simulatedSeason['Index A'].values] - rating[simulatedSeason['Index B'].values]
p = 1 / (1 + np.exp(-deltaRating))
win = pm.Bernoulli('win', p, observed=simulatedSeason['Team A Wins'].values)
In [101]:
with model:
trace = pm.sample(1000)
In [113]:
sns.set_context('poster')
plt.figure(figsize=(10, 5))
for team_number in team_numbers:
rating_posterior = trace['rating'][:, team_number]
team_name = true_index[team_number]
sns.distplot(rating_posterior, label=team_name)
plt.legend()
plt.xlabel('Rating')
plt.ylabel('Density')
Out[113]:
Hmm, something looks odd here. The posterior pdf for these two teams has significant overlap. Does this mean that our model is not sure about which team is better?
In [114]:
sns.set_context('poster')
plt.figure(figsize=(10, 5))
for team_number in team_numbers[:-1]:
rating_posterior = trace['rating'][:, team_number] - trace['rating'][:, -1]
team_name = true_index[team_number]
sns.distplot(rating_posterior, label="{} - {}".format(team_name, true_index[team_numbers[-1]]))
plt.legend()
plt.xlabel('Rating')
plt.ylabel('Density')
Out[114]:
In [119]:
gt0 = rating_posterior > 0
print("Percentage of samples where 'All Stars' have a higher rating than 'Just Having Fun': {:.2f}%".format(
100. * rating_posterior[gt0].size / rating_posterior.size))
Ah, so the posterior pdf is actually quite clear: There is a 99.22% chance that "All Stars" are better than "Just Having Fun".
How does our confidence change as a function of the number of games in the season?
In [104]:
rating_posterior
Out[104]:
In [88]:
.75 ** 14
Out[88]:
In [82]:
estimatedratings = trace['rating'].mean(axis=0)
In [83]:
estimatedratings
Out[83]:
In [47]:
for key in true_rating:
print("True: {:.2f}; Estimated: {:.2f}".format((true_rating[key], estimatedratings[key]))
In [22]:
key
Out[22]:
In [ ]: